pacman::p_load(fst,
               fs,
               lubridate,
               tidyverse
)

Determine_Data_Available <- function(model) {
  
  path_model <-paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "fitted_training_", model, "_", 
                      TRAINING_SUFFIX, "_", RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC, "/")
  
  dir_exists(path_model)
  
}

v_existing_models <- map_lgl(V_POTENTIAL_MODELS, ~ Determine_Data_Available(.))

v_models <- V_POTENTIAL_MODELS[v_existing_models]

start_year <- year(TRAINING_START_QTR)
end_year <- year(TRAINING_END_QTR)

v_years <- start_year:end_year

df_fips <- read_csv("../../data/fips.csv") %>% 
  distinct(state, state_code)

Load_Year_Fitted <- function(year) {
  
  for (model_ in v_models) {
    
    path_model <- paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "fitted_training_", model_, 
                         "_", TRAINING_SUFFIX, "_", RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC, "/")
    
    file_info <- tibble(file_name = dir_ls(path_model)) %>% 
      mutate(
        qtr = ymd(str_extract(file_name, "[0-9]{8}")),
        rand_no_cid = str_extract(file_name, "[0-9]{4}(?=.fst)"),
        qtr_year = year(qtr)
      ) %>% 
      filter(qtr_year == year)
    
    df_model_fitted <- map_dfr(file_info$file_name, ~ read_fst(.)) %>% 
      rename(
        "{model_}" := 3
      ) %>% 
      filter(!is.na(cid))
    
    assign(paste0("df_", model_), df_model_fitted)
  }
  
  paste0("df_", v_models) %>%
    map(., ~ get(.)) %>% 
    reduce(left_join, by = c("cid", "qtr"))
  
}

Load_Raw_Training <- function(year) {
  
  path_training <- paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "train_", TRAINING_SUFFIX,
                          "_", RAND_NO_CID_SMALLEST_LARGEST, "/", "training.fst")
  
  read_fst(path_training, columns = c("cid", "qtr", "state", "county_code", "census_tract", "census_block")) %>% 
    mutate(
      year = year(qtr)
    ) %>% 
    filter(year == year)
  
}

Load_CRA <- function(data_year) {
  
  path_CRA <- paste0("../../data/CRA/final/", data_year, ".csv")
  df <- read_csv(path_CRA) 
  
  df %>% 
    filter(!is.na(Income_Level)) %>% 
    left_join(., df_fips, by = c("State" = "state_code"))  %>%
    select(-State)
    
}

Save_Merged_Race_Fitted_Values <- function(year) {
  
  df_CRA <- Load_CRA(year) 
    
  Load_Year_Fitted(year) %>% 
    left_join(., Load_Raw_Training(year), by = c("cid", "qtr")) %>% 
    left_join(., df_CRA, by = c("state", "county_code" = "County", "census_tract" = "Tract"))
  
}

map_dfr(v_years, ~ Save_Merged_Race_Fitted_Values(.)) %>% 
  filter(!is.na(Income_Level)) %>% 
  select(cid, qtr, Year, xgb, logistic, riskscore, t_default, is_thick, Income_Level) %>% 
  write_fst(., paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/fitted_merged_training_", TRAINING_SUFFIX, 
            "_", RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC, "/", "training_fitted_CRA.fst"))










